from constants import *
from utils import evaluate_model_policy, plot_study, plot_fig
from trainer import get_trained_model
import optuna
from environment import StreetFighterEnv
from stable_baselines3 import PPO, A2C
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
from actor_critic import A2CCNNPolicy
from feature_extractors import CNNExtractorWithAttention, CNNExtractor
from tuner import Tuner
import os
from layers import ActorCriticLayer
TIMESTEPS = 5000000
N_TRIALS = 5
PLOTLY_CONFIG = {"staticPlot": True}
model = A2C
model_dir = 'models/bias'
env = StreetFighterEnv(capture_movement=False, training=True)
policy_network = A2CCNNPolicy
policy_kwargs = dict(
features_extractor_class=CNNExtractorWithAttention,
features_extractor_kwargs=dict(features_dim=512,),
actor_critic_class=ActorCriticLayer
)
tuner = Tuner(model=model, env=env, policy_network=policy_network, policy_args=policy_kwargs,
timesteps=TIMESTEPS, save_dir=model_dir)
study, (study_name, study_location) = tuner.tune_study(n_trials=N_TRIALS, study_name="study_bias")
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving. wandb: Currently logged in as: ashutosht (use `wandb login --relogin` to force relogin) wandb: wandb version 0.12.14 is available! To upgrade, please run: wandb: $ pip install wandb --upgrade
[I 2022-04-19 01:41:48,548] A new study created in RDB with name: study_bias
[I 2022-04-19 03:57:09,610] Trial 0 finished with value: 2530452.0 and parameters: {'gamma': 0.8048919233171006, 'learning_rate': 0.0001974081547656255, 'gae_lambda': 0.8988739824948455}. Best is trial 0 with value: 2530452.0. [I 2022-04-19 06:11:37,433] Trial 1 finished with value: 1192052.0 and parameters: {'gamma': 0.8147920048601043, 'learning_rate': 0.0003085066107283282, 'gae_lambda': 0.875886745296491}. Best is trial 0 with value: 2530452.0. [I 2022-04-19 08:26:28,010] Trial 2 finished with value: 1210642.0 and parameters: {'gamma': 0.8487415057475693, 'learning_rate': 0.0003495630313306731, 'gae_lambda': 0.862982684998839}. Best is trial 0 with value: 2530452.0. [I 2022-04-19 10:41:46,849] Trial 3 finished with value: 2562722.0 and parameters: {'gamma': 0.8012718649878539, 'learning_rate': 0.00012279781289135615, 'gae_lambda': 0.8161525857987253}. Best is trial 3 with value: 2562722.0. [I 2022-04-19 12:57:19,278] Trial 4 finished with value: 2563962.0 and parameters: {'gamma': 0.8074138106735396, 'learning_rate': 0.00017021419853109396, 'gae_lambda': 0.8787060424267222}. Best is trial 4 with value: 2563962.0.
plots = plot_study(study)
for plot in plots:
plot.show("notebook", config=PLOTLY_CONFIG)
model = A2C
model_dir = 'models/bias_with_movement'
env = StreetFighterEnv(capture_movement=True, training=True)
policy_network = A2CCNNPolicy
policy_kwargs = dict(
features_extractor_class=CNNExtractorWithAttention,
features_extractor_kwargs=dict(features_dim=512,),
actor_critic_class=ActorCriticLayer
)
tuner = Tuner(model=model, env=env, policy_network=policy_network, policy_args=policy_kwargs,
timesteps=TIMESTEPS, save_dir=model_dir)
study, (study_name, study_location) = tuner.tune_study(n_trials=N_TRIALS, study_name="study_bias_with_movement")
| gae_lambda | █▆▅▁▆ |
| gamma | ▂▃█▁▂ |
| learning_rate | ▃▇█▁▂ |
| value | █▁▁██ |
| gae_lambda | 0.87871 |
| gamma | 0.80741 |
| learning_rate | 0.00017 |
| value | 2563962.0 |
./wandb/run-20220419_014142-12kbs1dw/logswandb: wandb version 0.12.14 is available! To upgrade, please run: wandb: $ pip install wandb --upgrade
[I 2022-04-19 12:57:36,848] A new study created in RDB with name: study_bias_with_movement
[I 2022-04-19 15:12:22,456] Trial 0 finished with value: -195378.0 and parameters: {'gamma': 0.8387448831050915, 'learning_rate': 0.0003517968715652912, 'gae_lambda': 0.894679240005201}. Best is trial 0 with value: -195378.0. [I 2022-04-19 17:27:47,059] Trial 1 finished with value: 1210642.0 and parameters: {'gamma': 0.8048035975853882, 'learning_rate': 0.00014066916091871157, 'gae_lambda': 0.882581628377622}. Best is trial 1 with value: 1210642.0. [I 2022-04-19 19:42:51,749] Trial 2 finished with value: -336708.0 and parameters: {'gamma': 0.8078138993145738, 'learning_rate': 0.0001188753797282247, 'gae_lambda': 0.8861808937953406}. Best is trial 1 with value: 1210642.0. [I 2022-04-19 21:57:29,712] Trial 3 finished with value: -8996368.0 and parameters: {'gamma': 0.8139553433095731, 'learning_rate': 0.00022537483040517404, 'gae_lambda': 0.8309297000576678}. Best is trial 1 with value: 1210642.0. [I 2022-04-20 00:12:51,934] Trial 4 finished with value: 2562722.0 and parameters: {'gamma': 0.8000631888523035, 'learning_rate': 0.0002663649075073809, 'gae_lambda': 0.803249112105675}. Best is trial 4 with value: 2562722.0.
plots = plot_study(study)
for plot in plots:
plot.show("notebook", config=PLOTLY_CONFIG)